package com.elookinto.util;

import java.text.BreakIterator;

import java.util.*;

/**
 * Created by IntelliJ IDEA.
 * User: Administrator
 * Date: Aug 22, 2009
 * Time: 6:16:05 PM
 * To change this template use File | Settings | File Templates.
 */
public class QueryTermExtractor {

    public static List<String> extract(String query) {


        BreakIterator boundary = BreakIterator.getWordInstance();
        boundary.setText(query);
        Set<String> set = new HashSet<String>();
        int start = boundary.first();
        for (int end = boundary.next();
             end != BreakIterator.DONE;
             start = end, end = boundary.next()) {
            //out.println(source.substring(start,end));
            String temp = query.substring(start, end);
            boolean isWord = false;
            for (int p = start; p < end; p++) {
                if (Character.isLetter(query.codePointAt(p)))
                    isWord = true;
            }
            if (isWord)
                set.add(temp);
        }

        return new ArrayList<String>(set);


    }


    public static void main(String s[]) {
        String query = "this is testing ???? ?, () + ???";
        System.out.println(query);
        List<String> words = extract(query);
        for (String word : words) {

            System.out.println(word);
        }
    }
}
